/*
	Estimates elasticity of college graduate quality with respect to GDP per worker
	using a bootstrap approach to estimating z_c (and thus q_j)
	Output:	Figure B5a
			Figure B5b
*/
 

local seed "C:\Users\jsock\Dropbox\Research\GD\International"

local dataPath "`seed'/Data"
local inputPath "`seed'/InputData"
local figurePath "`seed'/Replication/Figures"
local tablePath "`seed'/Replication/Tables"
local estimatePath "`seed'/Replication/Estimates"
local tempPath "`seed'/Replication/TempData"

local simulations = 500

********************************************************
* Read in country gdp to get universitycountry gdp
********************************************************

preserve

	clear
	
	insheet using "`inputPath'/Exchange_rates_2022.csv" , comma
	
	keep if year >= 2010 
	keep if year <= 2021 
	
	bys country_glassdoor: egen avg_gdppw = mean(gdppw)
	generate log_gdppw_uc = ln(avg_gdppw)
	
	bys country_glassdoor: keep if _n == 1

	keep country_glassdoor iso log_gdppw_uc	
	keep if log_gdppw_uc != .
	sort country_glassdoor
	
	save "`tempPath'/Country_gdppw.dta" , replace
	
restore 

********************************************************
* Read in country gdp to get universitycountry gdp
********************************************************

clear 
set more off
set matsize 10000
set scheme s1mono

* Set path and load data 
insheet using "`dataPath'/Salaries_international_dataset_main.csv", comma
drop v1

drop if jobtitle == ""

drop metro shortname  
drop city basecurrency country_iso
drop sectorname iscurrentjobflag 

*--------------------------------
* Thresholds used for sample selection
*--------------------------------
.
scalar country_premia_thresh = 25 

scalar selection_thresh = 25

scalar school_thresh = 25

scalar pctThresh = 0.05

*--------------------------------
* Exclude users that leave more than 10 reviews
*--------------------------------

sort fk_userid yearofsalary dateval salid

by fk_userid : gen userReviews = _N

by fk_userid : gen obsNum = _n

drop if userReviews > 10

drop userReviews

*--------------------------------
* Generate additional variables
*--------------------------------

generate exp = yearsofrelevantexpnumber
generate exp_sqrd = exp ^ 2
drop yearsofrelevantexpnumber

generate logbase = ln(basesalary * ppp_xrat)

generate log_gdppw = ln(gdppw)

drop if ppp_xrat == .

*--------------------------------
* Exclude outliers in base pay
*--------------------------------

scalar scalingThresh = 10

generate realbase = basesalary * ppp_xrat
generate outside_thresh =  (realbase < (1/scalingThresh) * gdppw) | (realbase > scalingThresh * gdppw) 
drop realbase

*------------------------------------------
* Create Ranking Bins
*------------------------------------------

generate rankBin = ""
replace rankBin = "A_01_20" if inrange(world_rank,1,20)
replace rankBin = "B_21_50" if inrange(world_rank,21,50)
replace rankBin = "C_51_100" if inrange(world_rank,51,100)
replace rankBin = "D_101_250" if inrange(world_rank,101,250)
replace rankBin = "E_251_500" if inrange(world_rank,251,500)
replace rankBin = "F_501_1000" if inrange(world_rank,501,1000)
replace rankBin = "G_1001_2000" if inrange(world_rank,1001,2000)
replace rankBin = "H_UNRANKED" if world_rank == . & universitycountry != ""

generate national_rank_pct = national_rank / numberuniversities

*------------------------------------------
* Create Major of Study Bins
*------------------------------------------


generate majorStem = ""
replace majorStem = "Stem" if grpmajor == "Biological Sciences" | grpmajor == "Engineering" | grpmajor == "Physical Sciences" | grpmajor == "Technology"   
replace majorStem = "NonStem" if grpmajor == "Business" | grpmajor == "Social Sciences" | grpmajor == "Arts and Humanities" | grpmajor == "Communication" | grpmajor == "Education" | grpmajor == "Health Service" | grpmajor == "Social Service" 

*-------------------------------- 
* Drop Singapore Polytechnics which aren't considered by WHED
*--------------------------------

drop if school == "Nanyang Polytechnic"
drop if school == "Ngee Ann Polytechnic"
drop if school == "Republic Polytechnic"
drop if school == "Singapore Polytechnic"
drop if school == "Temasek Polytechnic"

*--------------------------------
* Generate indicators for wage in same country as university or not
*--------------------------------

generate home_country = countryname == universitycountry & universitycountry != ""

generate foreign_country = countryname != universitycountry & universitycountry != ""

*--------------------------------
* Generate indicators for valid in education analysis
*--------------------------------

* For first degree

generate hasDegree = degree != "UNMATCHED" & degree != "missing" & degree != ""  & degree != "HIGHSCHOOL"

generate uniDegree = degree == "BACHELORS"  

generate hasSchool = school != ""

* For second degree

generate hasDegree_2 = degree_2 != "UNMATCHED" & degree_2 != "missing" & degree_2 != ""  & degree != "HIGHSCHOOL"

generate uniDegree_2 = degree_2 != "ASSOCIATES" & degree_2 != "DIPLOMA"  & degree_2 != "HIGHSCHOOL" & degree_2 != "BACHELORS"

generate hasSchool_2 = school_2 != ""

replace degree_2 = "NONE" if degree_2 == ""

*--------------------------------
* Add work country name
*--------------------------------

generate country_glassdoor = countryname

merge m:1 country_glassdoor using "`tempPath'/Country_gdppw.dta"
drop _merge

rename iso work_country_iso
rename log_gdppw_uc log_gdppw_work_country 

drop country_glassdoor 

*--------------------------------
* Add country of study name
*--------------------------------

generate country_glassdoor = universitycountry

merge m:1 country_glassdoor using "`tempPath'/Country_gdppw.dta"
drop _merge

rename iso university_country_iso
rename log_gdppw_uc log_gdppw_university_country 

drop country_glassdoor 

*--------------------------------
* Drop unused obs
*--------------------------------

drop if employertypecode == "SELF_EMPLOYED" 

drop if outside_thresh

generate valid_educ = uniDegree & hasDegree & hasSchool & universitycountry != ""
keep if valid_educ

********************************************************
* SAMPLE SIZE FOR COUNTRY PREMIA Z_C (MOVERS)
********************************************************

*--------------------------------
* Determine top destinations
*--------------------------------

sort fk_userid yearofsalary dateval salid

by fk_userid: generate destination = countryname[_n+1]

generate migrant = destination != countryname & destination != ""

sort destination 
by destination : egen destinationMigrants = sum(migrant)
by destination : replace destinationMigrants = . if destination == ""
by destination : replace destinationMigrants = . if _n > 1

replace destinationMigrants = destinationMigrants * -1

sort destinationMigrants 

* Determine top destination (1.0% of migrants)

egen totalMigrants = sum(destinationMigrants * -1)
	
generate migrant_share = -1 * destinationMigrants / totalMigrants
	
generate top_destination = migrant_share >= 0.01 & migrant_share != .

bys destination : egen topDestination = max(top_destination)

*--------------------------------
* Determine # of migrants to top ten destination for each origin
*--------------------------------

bys countryname : egen migrantsOverall = sum(migrant)  

bys countryname : egen migrantsTop = sum(migrant * topDestination)  

generate valid_country_premia = migrantsTop >= country_premia_thresh & migrantsTop != .

********************************************************
* ESTIMATE SKILL LOSS
********************************************************

sort fk_userid yearofsalary dateval salid
 
by fk_userid: gen prev_country = countryname[_n-1]

*--------------------------------
* Determine if migrated
*--------------------------------

generate migrated = countryname != prev_country & prev_country != ""

sort fk_userid yearofsalary dateval salid
by fk_userid: gen already_migrated = sum(migrated)

*--------------------------------
* Generate variable capturing delta_gdppw for migrants - between countries of work
*--------------------------------

sort fk_userid yearofsalary dateval salid
by fk_userid: gen prev_gdp = log_gdppw_work_country[_n-1]
generate delta_log_gdppw = abs(log_gdppw_work_country - prev_gdp)

generate migrant_delta_gdppw_work = 0
by fk_userid: replace migrant_delta_gdppw_work = delta_log_gdppw if migrated == 1 
by fk_userid: replace migrant_delta_gdppw_work = migrant_delta_gdppw_work[_n-1] if migrated == 0 & migrated[_n-1] == 1
by fk_userid: replace migrant_delta_gdppw_work = migrant_delta_gdppw_work[_n-2] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 1
by fk_userid: replace migrant_delta_gdppw_work = migrant_delta_gdppw_work[_n-3] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 1
by fk_userid: replace migrant_delta_gdppw_work = migrant_delta_gdppw_work[_n-4] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 0 & migrated[_n-4] == 1
by fk_userid: replace migrant_delta_gdppw_work = migrant_delta_gdppw_work[_n-5] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 0 & migrated[_n-4] == 0 & migrated[_n-5] == 1
by fk_userid: replace migrant_delta_gdppw_work = migrant_delta_gdppw_work[_n-6] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 0 & migrated[_n-4] == 0 & migrated[_n-5] == 0 & migrated[_n-6] == 1
by fk_userid: replace migrant_delta_gdppw_work = migrant_delta_gdppw_work[_n-7] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 0 & migrated[_n-4] == 0 & migrated[_n-5] == 0 & migrated[_n-6] == 0 & migrated[_n-7] == 1
by fk_userid: replace migrant_delta_gdppw_work = migrant_delta_gdppw_work[_n-8] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 0 & migrated[_n-4] == 0 & migrated[_n-5] == 0 & migrated[_n-6] == 0 & migrated[_n-7] == 0 & migrated[_n-8] == 1

*--------------------------------
* Generate coarse desination FE for migration 
*	Keep top destinations alone, group others by continent
*--------------------------------

	* Top destinations
	
	preserve

		keep destination topDestination
		rename destination countryname
		rename topDestination is_topDestination
		
		bys countryname : keep if _n == 1
		
		tempfile in_results
		save `in_results', emptyok
		
	restore

	merge m:1 countryname using `in_results'
	drop if _merge == 2
	drop _merge

	generate coarseDestination = ""
	replace coarseDestination = countryname if is_topDestination == 1
	drop is_topDestination

	* Continents
	
	preserve

		clear
		
		insheet using "`inputPath'\Country_continents.csv" , comma
		
		rename v2 countryname
		rename v7 continent
		keep countryname continent
		
		bys countryname : keep if _n == 1
		
		replace countryname = "Czech Republic" if countryname == "Czechia"
		replace countryname = "Iran" if countryname == "Iran (Islamic Republic of)"
		replace countryname = "Russia" if countryname == "Russian Federation"
		replace countryname = "Hong Kong" if countryname == "China, Hong Kong Special Administrative Region"
		replace countryname = "South Korea" if countryname == "Republic of Korea"
		replace countryname = "United Kingdom" if countryname == "United Kingdom of Great Britain and Northern Ireland"
		replace countryname = "United States" if countryname == "United States of America"
		replace countryname = "Vietnam" if countryname == "Viet Nam"
		
		tempfile in_results
		save `in_results', emptyok
		
	restore

	merge m:1 countryname using `in_results'
	drop if _merge == 2
	drop _merge

	replace coarseDestination = continent if coarseDestination == "" & valid_country_premia
	
	* Destination FE
	
	tab coarseDestination if valid_country_premia == 1, gen(c_)
	foreach my_var of varlist c_*{
		replace `my_var' = 0 if ~(migrated | already_migrated >= 1)
	}	

********************************************************
* STEP 0: GET SAMPLE OF MIGRANTS AND CREATE BOOTSTRAP SAMPLES -- 48 COUNTRIES
********************************************************

reghdfe logbase exp exp_sqrd migrant_delta_gdppw_work if valid_country_premia , absorb(fk_userid countryname coarseDestination yearofsalary ) vce(cluster countryname)
	
generate migrantSample = e(sample) * -1

bys migrantSample: gen obs_id = _n
replace obs_id = . if migrantSample == 0

preserve

	keep if obs_id != .

	bys fk_userid: gen migrantRow = _n 
	
	sort migrantRow fk_userid
	
	by migrantRow: gen migrant_id = _n
	replace migrant_id = . if migrantRow > 1
	bys fk_userid : egen id_migrant = max(migrant_id)
	
	* OUTPUT SAMPLE FOR ESTIMATING IN FIRST STAGE
	
	order id_migrant obs_id, first

	sort id_migrant obs_id
	
	save "`tempPath'\Migrant_data.dta" , replace
	
	* CREATE BOOTSTRAP SAMPLES BY VECTORS OF MIGRANT IDS
	
	keep if migrant_id != .
	
	keep migrant_id
	
	sum migrant_id, detail
	scalar migrantSampleSize = r(max)

	rename migrant_id old_migrant_id
	
	foreach ii of numlist 1/`simulations'{
		
		generate random_id_`ii' = round( (migrantSampleSize-1) * runiform()) + 1
		
		save "`tempPath'\Migrant_ids_`ii'.dta" , replace
		
		drop random_id_`ii' 

	}
	
	* COLLAPSE TO TOTAL INSTANCES FOR EACH MIGRANT
	
	clear
	
	foreach ii of numlist 1/`simulations'{
		
		use "`tempPath'\Migrant_ids_`ii'.dta" 
		
		bys random_id_`ii': gen count_id_`ii' = _N
		
		bys random_id_`ii': keep if _n == 1
		
		save "`tempPath'\Migrant_ids_`ii'.dta" , replace
		
	}
		
restore

********************************************************
* STEP 1: ESTIMATE COUNTRY FE Z_C FOR EACH MIGRANT SAMPLE
********************************************************

preserve

	foreach ii of numlist 1/`simulations'{
			
		clear
		
		use "`tempPath'\Migrant_data.dta"
		
		bys fk_userid: egen random_id_`ii' = max(migrant_id)
		
		merge m:1 random_id_`ii' using  "`tempPath'\Migrant_ids_`ii'.dta"
			
		expand count_id_`ii'
		
		* Estimate z_c
		
		reghdfe logbase exp exp_sqrd migrant_delta_gdppw_work if valid_country_premia , absorb(fk_userid fe_country=countryname coarseDestination yearofsalary ) vce(cluster countryname)
		/*
		generate tau = 0
		foreach my_var of varlist c_* {
			
			bys coarseDestination : egen fill_`my_var' = max(`my_var')
			replace tau = tau + `my_var' * _b["`my_var'"] 
			replace tau = tau + fill_`my_var' * _b["`my_var'"] if `my_var' == 0 & universitycountry != countryname 
			drop fill_*
			
		}
		foreach my_var of varlist migrant_delta_gdppw_work {

			replace tau = tau + `my_var' * _b["`my_var'"]
			replace tau = tau + abs(log_gdppw_work_country - log_gdppw_university_country) * _b["`my_var'"] if `my_var' == 0 & universitycountry != countryname 

		}
		*/
		keep if fe_country != .
		
		bys countryname : keep if _n == 1
		
		keep countryname fe_country //tau
		
		rename fe_country z_c_`ii'
		*rename tau tau_`ii'

		save "`tempPath'\Estimate_z_c_`ii'.dta" , replace
	
	}
		
restore

rm "`tempPath'\Migrant_data.dta"
		
********************************************************
* STEP 2: READ BACK IN DATASET AND KEEP ONLY EDUCATION SAMPLE
********************************************************

	clear 
	set more off
	set matsize 10000
	set scheme s2mono

	* Set path and load data 
	insheet using "`dataPath'/Salaries_international_dataset_main.csv", comma
	drop v1

	drop if jobtitle == ""

	drop metro shortname  
	drop city basecurrency country_iso   
	drop sectorname iscurrentjobflag 

	*--------------------------------
	* Thresholds used for sample selection
	*--------------------------------
	.
	scalar country_premia_thresh = 25

	scalar selection_thresh = 25

	scalar school_thresh = 25

	scalar pctThresh = 0.05

	*--------------------------------
	* Exclude users that leave more than 10 reviews
	*--------------------------------

	sort fk_userid yearofsalary dateval salid

	by fk_userid : gen obsNum = _n

	by fk_userid : gen userReviews = _N

	drop if userReviews > 10

	drop userReviews

	*--------------------------------
	* Generate additional variables
	*--------------------------------

	generate exp = yearsofrelevantexpnumber
	generate exp_sqrd = exp ^ 2
	drop yearsofrelevantexpnumber

	generate logbase = ln(basesalary * ppp_xrat)

	generate log_gdppw = ln(gdppw)

	drop if ppp_xrat == .

	*--------------------------------
	* Exclude outliers in base pay
	*--------------------------------

	scalar scalingThresh = 10

	generate realbase = basesalary * ppp_xrat
	generate outside_thresh =  (realbase < (1/scalingThresh) * gdppw) | (realbase > scalingThresh * gdppw) 
	drop realbase

	*------------------------------------------
	* Create Ranking Bins
	*------------------------------------------

	generate rankBin = ""
	replace rankBin = "A_01_20" if inrange(world_rank,1,20)
	replace rankBin = "B_21_50" if inrange(world_rank,21,50)
	replace rankBin = "C_51_100" if inrange(world_rank,51,100)
	replace rankBin = "D_101_250" if inrange(world_rank,101,250)
	replace rankBin = "E_251_500" if inrange(world_rank,251,500)
	replace rankBin = "F_501_1000" if inrange(world_rank,501,1000)
	replace rankBin = "G_1001_2000" if inrange(world_rank,1001,2000)
	replace rankBin = "H_UNRANKED" if world_rank == . & universitycountry != ""

	generate national_rank_pct = national_rank / numberuniversities

	*------------------------------------------
	* Create Major of Study Bins
	*------------------------------------------

	generate majorStem = ""
	replace majorStem = "Stem" if grpmajor == "Biological Sciences" | grpmajor == "Engineering" | grpmajor == "Physical Sciences" | grpmajor == "Technology"   
	replace majorStem = "NonStem" if grpmajor == "Business" | grpmajor == "Social Sciences" | grpmajor == "Arts and Humanities" | grpmajor == "Communication" | grpmajor == "Education" | grpmajor == "Health Service" | grpmajor == "Social Service"
	
	*-------------------------------- 
	* Drop Singapore Polytechnics which aren't considered by WHED
	*--------------------------------

	drop if school == "Nanyang Polytechnic"
	drop if school == "Ngee Ann Polytechnic"
	drop if school == "Republic Polytechnic"
	drop if school == "Singapore Polytechnic"
	drop if school == "Temasek Polytechnic"

	*--------------------------------
	* Generate indicators for wage in same country as university or not
	*--------------------------------

	generate home_country = countryname == universitycountry & universitycountry != ""

	generate foreign_country = countryname != universitycountry & universitycountry != ""

	*--------------------------------
	* Generate indicators for valid in education analysis
	*--------------------------------

	* For first degree

	generate hasDegree = degree != "UNMATCHED" & degree != "missing" & degree != ""  & degree != "HIGHSCHOOL"

	generate uniDegree = degree == "BACHELORS"  
	
	generate hasSchool = school != ""

	* For second degree

	generate hasDegree_2 = degree_2 != "UNMATCHED" & degree_2 != "missing" & degree_2 != ""  & degree != "HIGHSCHOOL"

	generate uniDegree_2 = degree_2 != "ASSOCIATES" & degree_2 != "DIPLOMA"  & degree_2 != "HIGHSCHOOL" & degree_2 != "BACHELORS"

	generate hasSchool_2 = school_2 != ""

	replace degree_2 = "NONE" if degree_2 == ""

	*--------------------------------
	* Add work country name
	*--------------------------------

	generate country_glassdoor = countryname

	merge m:1 country_glassdoor using "`tempPath'/Country_gdppw.dta"
	drop _merge

	rename iso work_country_iso
	rename log_gdppw_uc log_gdppw_work_country 

	drop country_glassdoor 

	*--------------------------------
	* Add country of study name
	*--------------------------------

	generate country_glassdoor = universitycountry

	merge m:1 country_glassdoor using "`tempPath'/Country_gdppw.dta"
	drop _merge

	rename iso university_country_iso
	rename log_gdppw_uc log_gdppw_university_country 

	drop country_glassdoor 

	*--------------------------------
	* Drop unused obs
	*--------------------------------

	drop if employertypecode == "SELF_EMPLOYED" 

	drop if outside_thresh

	generate valid_educ = uniDegree & hasDegree & hasSchool & universitycountry != ""
	keep if valid_educ

	********************************************************
	* SAMPLE SIZE FOR COUNTRY PREMIA Z_C (MOVERS)
	********************************************************

	*--------------------------------
	* Determine top destinations
	*--------------------------------

	sort fk_userid yearofsalary dateval salid

	by fk_userid: generate destination = countryname[_n+1]

	generate migrant = destination != countryname & destination != ""

	sort destination 
	by destination : egen destinationMigrants = sum(migrant)
	by destination : replace destinationMigrants = . if destination == ""
	by destination : replace destinationMigrants = . if _n > 1

	replace destinationMigrants = destinationMigrants * -1

	sort destinationMigrants 

	* Determine top destination (1.0% of migrants)

	egen totalMigrants = sum(destinationMigrants * -1)
		
	generate migrant_share = -1 * destinationMigrants / totalMigrants
		
	generate top_destination = migrant_share >= 0.01 & migrant_share != .

	bys destination : egen topDestination = max(top_destination)

	drop destinationMigrants top_destination

	*--------------------------------
	* Determine # of migrants to top ten destination for each origin
	*--------------------------------

	bys countryname : egen migrantsOverall = sum(migrant)  

	bys countryname : egen migrantsTop = sum(migrant * topDestination)  

	generate valid_country_premia = migrantsTop >= country_premia_thresh & migrantsTop != .

	********************************************************
	* ESTIMATE SKILL LOSS
	********************************************************

	sort fk_userid yearofsalary dateval salid
	 
	by fk_userid: gen prev_country = countryname[_n-1]

	*--------------------------------
	* Determine if migrated
	*--------------------------------

	generate migrated = countryname != prev_country & prev_country != ""

	sort fk_userid yearofsalary dateval salid
	by fk_userid: gen already_migrated = sum(migrated)

	*--------------------------------
	* Generate variable capturing delta_gdppw for migrants - between countries of work
	*--------------------------------

	sort fk_userid yearofsalary dateval salid
	by fk_userid: gen prev_gdp = log_gdppw_work_country[_n-1]
	generate delta_log_gdppw = abs(log_gdppw_work_country - prev_gdp)

	generate migrant_delta_gdppw_work = 0
	by fk_userid: replace migrant_delta_gdppw_work = delta_log_gdppw if migrated == 1 
	by fk_userid: replace migrant_delta_gdppw_work = migrant_delta_gdppw_work[_n-1] if migrated == 0 & migrated[_n-1] == 1
	by fk_userid: replace migrant_delta_gdppw_work = migrant_delta_gdppw_work[_n-2] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 1
	by fk_userid: replace migrant_delta_gdppw_work = migrant_delta_gdppw_work[_n-3] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 1
	by fk_userid: replace migrant_delta_gdppw_work = migrant_delta_gdppw_work[_n-4] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 0 & migrated[_n-4] == 1
	by fk_userid: replace migrant_delta_gdppw_work = migrant_delta_gdppw_work[_n-5] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 0 & migrated[_n-4] == 0 & migrated[_n-5] == 1
	by fk_userid: replace migrant_delta_gdppw_work = migrant_delta_gdppw_work[_n-6] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 0 & migrated[_n-4] == 0 & migrated[_n-5] == 0 & migrated[_n-6] == 1
	by fk_userid: replace migrant_delta_gdppw_work = migrant_delta_gdppw_work[_n-7] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 0 & migrated[_n-4] == 0 & migrated[_n-5] == 0 & migrated[_n-6] == 0 & migrated[_n-7] == 1
	by fk_userid: replace migrant_delta_gdppw_work = migrant_delta_gdppw_work[_n-8] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 0 & migrated[_n-4] == 0 & migrated[_n-5] == 0 & migrated[_n-6] == 0 & migrated[_n-7] == 0 & migrated[_n-8] == 1

	*--------------------------------
	* Generate coarse desination FE for migration 
	*	Keep top destinations alone, group others by continent
	*--------------------------------

		* Top destinations
		
		preserve

			keep destination topDestination
			rename destination countryname
			rename topDestination is_topDestination
			
			bys countryname : keep if _n == 1
			
			tempfile in_results
			save `in_results', emptyok
			
		restore

		merge m:1 countryname using `in_results'
		drop if _merge == 2
		drop _merge

		generate coarseDestination = ""
		replace coarseDestination = countryname if is_topDestination == 1
		drop is_topDestination

		* Continents
		
		preserve

			clear
			
			insheet using "`inputPath'\Country_continents.csv" , comma
			
			rename v2 countryname
			rename v7 continent
			keep countryname continent
			
			bys countryname : keep if _n == 1
			
			tempfile in_results
			save `in_results', emptyok
			
		restore

		merge m:1 countryname using `in_results'
		drop if _merge == 2
		drop _merge

		replace coarseDestination = continent if coarseDestination == "" & valid_country_premia
		
		* Destination FE
		
		tab coarseDestination if valid_country_premia == 1, gen(c_)
		foreach my_var of varlist c_*{
			replace `my_var' = 0 if ~(migrated | already_migrated >= 1)
		}	
		
	*--------------------------------
	* Get countries with estimated premia
	*--------------------------------

	reghdfe logbase exp exp_sqrd migrant_delta_gdppw_work if valid_country_premia , absorb(fk_userid fe_country=countryname coarseDestination yearofsalary ) vce(cluster countryname)
	
	bys countryname : egen obs_step1 =sum(e(sample))

	bys countryname : egen z_c = max(fe_country)

	generate hasCountryPremia = z_c != .
	
	*/
	*--------------------------------
	* Keep only education sample
	*--------------------------------

	keep if valid_educ & hasCountryPremia & school != ""

	*--------------------------------
	* Determine step 2 sample thresholds 
	*--------------------------------

	bys school: egen schoolObs = sum(hasCountryPremia & valid_educ)

	bys school: egen schoolObsLocal = sum(hasCountryPremia & valid_educ & home_country)

	generate valid_school = schoolObsLocal >= school_thresh 
	
	* Merge in gdppw

	generate country_glassdoor = universitycountry

	merge m:1 country_glassdoor using "`tempPath'/Country_gdppw.dta"

	drop if _merge == 2
	drop _merge

********************************************************
* STEP 3: GET SAMPLE OF COLLEGE GRADUATES AND CREATE BOOTSTRAP SAMPLES
********************************************************

generate logbase_minus_z_c = logbase - z_c //- tau
		
reghdfe logbase_minus_z_c exp exp_sqrd if valid_educ & valid_school & home_country & universitycountry != "" , absorb(school yearofsalary)

generate educationSample = e(sample) * -1

bys educationSample: gen id_education = _n
replace id_education = . if educationSample == 0

preserve

	keep if id_education != .

	* CREATE BOOTSTRAP SAMPLES BY VECTORS OF EDUCATION IDS
	
	keep if id_education != .
	
	keep id_education
	
	sum id_education, detail
	scalar educationSampleSize = r(max)

	rename id_education old_education_id
	
	foreach ii of numlist 1/`simulations'{
		
		generate random_id_`ii' = round( (educationSampleSize-1) * runiform()) + 1
		
		save "`tempPath'\Education_ids_`ii'.dta" , replace
		
		drop random_id_`ii' 

	}
	
	* COLLAPSE TO TOTAL INSTANCES FOR EACH MIGRANT
	
	clear
	
	foreach ii of numlist 1/`simulations'{
		
		use "`tempPath'\Education_ids_`ii'.dta" 
		
		bys random_id_`ii': gen count_id_`ii' = _N
		
		bys random_id_`ii': keep if _n == 1
		
		save "`tempPath'\Education_ids_`ii'.dta" , replace
		
	}
	
restore

********************************************************
* STEP 3: READ BACK IN DATASET AND KEEP ONLY EDUCATION SAMPLE
********************************************************
	
	drop state birthyear highesteducation employertypecode
	drop workercountries
	drop *yearly
	drop startschool endschool *_2
	drop *_scorecard
	drop inventor *_cites *_patents
	drop jobtitle basepayperiodcode 
	drop gender hasbonus countrycurrencyshare
	drop score 
	drop degreescorecard nation
	drop destination migrant topDestination migrantsOverall migrantsTop 
	drop rankBin basesalary gpa 
	drop fk_resumeid fk_employerid 
	drop work_country_iso university_country_iso
	drop country_glassdoor iso 
	drop schoolObs schoolObsLocal 
	drop gdppw hasDegree uniDegree 
	drop majorStem ppp_xrat 
	drop valid_school 
					
	********************************************************
	* SETUP FOR LOOPING OVER SAMPLES
	********************************************************

	bys countryname: generate newCountry = _n == 1

	generate draw = _n

	generate draw_pct_5 = .

	generate draw_low = .
	
	foreach ii of numlist 1/`simulations'{
		
		disp "Iteration number: `ii'"
		
		preserve
			
			********************************************************
			* Read in education identifiers to use for current iteration
			********************************************************
			
			gen random_id_`ii' = id_education
		
			merge m:1 random_id_`ii' using  "`tempPath'\Education_ids_`ii'.dta"

			keep if _merge == 3
			drop _merge
			
			expand count_id_`ii'	
			
			********************************************************
			* Read in country premia
			********************************************************
			
			merge m:1 countryname using "`tempPath'\Estimate_z_c_`ii'.dta"
			
			keep if _merge == 3
			drop _merge
			
			generate logbase_minus_z_c_`ii' = logbase - z_c_`ii' 
			
			keep if logbase_minus_z_c_`ii' != .
			
			********************************************************
			* DETERMINE IF SCHOOL HAS SAMPLE SIZE
			********************************************************
					
			bys school: egen schoolObs = sum(valid_educ)

			bys school: egen schoolObsLocal = sum(valid_educ & home_country)

			generate valid_school = schoolObsLocal >= school_thresh 
			
			*/
			********************************************************
			* ESTIMATE SCHOOL FE CONTROLLING FOR Z_C AND SELECTION 
			********************************************************
			
			reghdfe logbase_minus_z_c_`ii' exp exp_sqrd if valid_educ & valid_school & home_country & universitycountry != "" , absorb(fe_school=school yearofsalary)

			bys school : egen q_j = max(fe_school)
				
			********************************************************
			* MAKE ALL Q_J RELATIVE TO UT AUSTIN
			********************************************************

			foreach my_var of varlist q_j*{
				
				generate utaustin = `my_var' if school == "The University of Texas at Austin"
				
				egen std_utaustin = max(utaustin)
				
				replace `my_var' = `my_var' - std_utaustin
				
				drop utaustin std_utaustin
			}
					
			********************************************************
			* CALCULATE ELASTICITIES WITH GDP
			********************************************************
				
			keep if q_j != .
			
			bys universitycountry school: keep if _n == 1

			bys universitycountry : generate newUniversityCountry = _n == 1
		
			* TOP 5% AVG Q_J

			foreach topUniversityThresh of numlist 5 {
				
				disp `topUniversityThresh'
			
				generate pct_thresh = `topUniversityThresh' / 100
				
				bys universitycountry : egen avg_q_j_`topUniversityThresh' = mean(q_j) if national_rank_pct <= pct_thresh & national_rank != .

				bys universitycountry : egen avg_pct_`topUniversityThresh' = max(avg_q_j_`topUniversityThresh')
				
				drop pct_thresh
				
			} 
									
			* NOT TOP 5% 

			bys universitycountry : egen avg_q_j_low = mean(q_j) if national_rank_pct > 0.05 | national_rank == .
				
			bys universitycountry : egen avg_low = max(avg_q_j_low)
					
			* ELASTICITIES

			reg avg_pct_5 log_gdppw_uc if newUniversityCountry 
		
				scalar next_pct_5 = _b["log_gdppw_uc"]
				
			reg avg_low log_gdppw_uc if newUniversityCountry 
			
				scalar next_low = _b["log_gdppw_uc"]
				
		restore
			
		********************************************************
		* UPDATE SAVED ESTIMATES
		********************************************************

		replace draw_pct_5 = next_pct_5 if draw == `ii'
		
		replace draw_low = next_low if draw == `ii'
		
	}
		
sort draw

********************************************************
* Export results
********************************************************
	
preserve

	keep draw draw_pct_5 draw_low

	keep if draw_pct_5 != .
	
	outsheet using "`estimatePath'\Bootstrap_elasticities.csv" , comma replace
	
restore
	
********************************************************
* Plot Bootstrap Simulations 
********************************************************
	
summarize draw_pct_5 , detail
summarize draw_pct_5 if draw_pct_5 <= r(p5), detail
local line_lb = r(p50)

summarize draw_pct_5 , detail
summarize draw_pct_5 if draw_pct_5 >= r(p95), detail
local line_ub = r(p50)

twoway kdensity draw_pct_5 , ///
		ytitle("frequency") xtitle("elasticity of top college graduate quality to log(gdppw)") lcolor("gs4") ///
		xline(`line_lb', lcolor(black) lpattern(dash)) ///
		xline(`line_ub', lcolor(black) lpattern(dash)) ///
		xscale(range(0.19 0.26)) xlabel(0.19(0.01)0.26)  ///
		yscale(range(0 40)) ylabel(0(10)40) 
graph export "`figurePath'\Figure_B5a.eps" , replace
	
summarize draw_low , detail
summarize draw_low if draw_low <= r(p5), detail
local line_lb = r(p50)

summarize draw_low , detail
summarize draw_low if draw_low >= r(p95), detail
local line_ub = r(p50)

twoway kdensity draw_low , ///
		ytitle("frequency") xtitle("elasticity of non-top college graduate quality to log(gdppw)") lcolor("gs8") ///
		xline(`line_lb', lcolor(black) lpattern(dash)) ///
		xline(`line_ub', lcolor(black) lpattern(dash)) ///
		xscale(range(0.24 0.31)) xlabel(0.24(0.01)0.31)  ///
		yscale(range(0 40)) ylabel(0(10)40) 
graph export "`figurePath'\Figure_B5b.eps" , replace

********************************************************
* Delete Temp Data Files
********************************************************
	
foreach ii of numlist 1/`simulations'{
	
	erase `tempPath'/Education_ids_`ii'.dta
	
	erase `tempPath'/Migrant_ids_`ii'.dta
	
	erase `tempPath'/Estimate_z_c_`ii'.dta
	
}
	
	
	
